# libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly_express as px
# importing all needed files
pop = pd.read_excel("/Users/silas/Documents/College/Spring 2022/Anthro Homework Research/Homework 5/Pop edited.xlsx")
poll_df = pd.ExcelFile("/Users/silas/Documents/College/Spring 2022/Anthro Homework Research/Homework 5/Pollution.xlsx")
poll_unorder = pd.read_excel(poll_df, 'Main', usecols=[*range(0,3)])
poll = poll_unorder.sort_values(by=['Year', 'State'])
income = pd.read_excel("/Users/silas/Documents/College/Spring 2022/Anthro Homework Research/Homework 5/Income.xlsx")
suicide = pd.read_excel("/Users/silas/Documents/College/Spring 2022/Anthro Homework Research/Homework 5/Suicide.xlsx")
# merging population, income, CO pollution
merge1 = pd.merge(pop, income, on=['Year', 'State'])
merge1
pop_poll = pd.merge(merge1, poll, on=['Year', 'State'])
pop_poll
| State | Year | Pop | Income | CO | |
|---|---|---|---|---|---|
| 0 | AL | 2000 | 4452339 | 51798 | 1435.409350 |
| 1 | AK | 2000 | 627500 | 77275 | 154.916970 |
| 2 | AZ | 2000 | 5165993 | 58172 | 1037.815130 |
| 3 | AR | 2000 | 2678511 | 43424 | 765.290420 |
| 4 | CA | 2000 | 34002756 | 68456 | 5828.319510 |
| ... | ... | ... | ... | ... | ... |
| 1045 | VA | 2020 | 8590563 | 81947 | 480.646957 |
| 1046 | WA | 2020 | 7693612 | 81083 | 429.406833 |
| 1047 | WV | 2020 | 1784787 | 51615 | 114.621768 |
| 1048 | WI | 2020 | 5832655 | 67094 | 321.539561 |
| 1049 | WY | 2020 | 582328 | 65108 | 61.977679 |
1050 rows × 5 columns
# merging population, income, suicide
pop_suicide = pd.merge(merge1, suicide, on=['Year', 'State'])
pop_suicide
| State | Year | Pop | Income | Suicide | |
|---|---|---|---|---|---|
| 0 | AL | 2005 | 4557808 | 47885 | 11.5 |
| 1 | AK | 2005 | 663661 | 72041 | 19.9 |
| 2 | AZ | 2005 | 5939292 | 58319 | 16.4 |
| 3 | AR | 2005 | 2779154 | 47250 | 14.3 |
| 4 | CA | 2005 | 36132147 | 66710 | 9.1 |
| ... | ... | ... | ... | ... | ... |
| 395 | VA | 2020 | 8590563 | 81947 | 13.5 |
| 396 | WA | 2020 | 7693612 | 81083 | 15.2 |
| 397 | WV | 2020 | 1784787 | 51615 | 19.4 |
| 398 | WI | 2020 | 5832655 | 67094 | 14.5 |
| 399 | WY | 2020 | 582328 | 65108 | 30.5 |
400 rows × 5 columns
# merging all variables
df_all = pd.merge(pop_poll, suicide, on=['Year', 'State'])
df_all
| State | Year | Pop | Income | CO | Suicide | |
|---|---|---|---|---|---|---|
| 0 | AL | 2005 | 4557808 | 47885 | 827.544647 | 11.5 |
| 1 | AK | 2005 | 663661 | 72041 | 133.547307 | 19.9 |
| 2 | AZ | 2005 | 5939292 | 58319 | 697.087586 | 16.4 |
| 3 | AR | 2005 | 2779154 | 47250 | 522.034956 | 14.3 |
| 4 | CA | 2005 | 36132147 | 66710 | 2810.511599 | 9.1 |
| ... | ... | ... | ... | ... | ... | ... |
| 395 | VA | 2020 | 8590563 | 81947 | 480.646957 | 13.5 |
| 396 | WA | 2020 | 7693612 | 81083 | 429.406833 | 15.2 |
| 397 | WV | 2020 | 1784787 | 51615 | 114.621768 | 19.4 |
| 398 | WI | 2020 | 5832655 | 67094 | 321.539561 | 14.5 |
| 399 | WY | 2020 | 582328 | 65108 | 61.977679 | 30.5 |
400 rows × 6 columns
# animation from 2000 to 2020 for income v CO pollution
px.scatter(data_frame=pop_poll, x="Income", y="CO", animation_frame="Year", animation_group="State",
size="Pop", color="State", hover_name="State", title="Income VS CO Pollution", size_max=40)
# animation from 2004, 2014-2020 comparing imcome with suicide over time
px.scatter(data_frame=pop_suicide, x="Income", y="Suicide", animation_frame="Year", animation_group="State",
size="Pop", color="State", hover_name="State", range_x=[40000,90000], range_y=[0,30],
title="Income VS Suicide", size_max=40)
# this was somewhat of an experimental animation, tried to find a way to compare all 6 varibles
# (State, Year, Pollution, Suicide, Income, and Population) and it worked out better than I expected
px.scatter(df_all, x="CO", y="Suicide", animation_frame="Year", animation_group="Income",
size="Pop", color="Income", hover_name='State', range_x=[0, 1500],range_y=[5, 30],
title="CO Pollution VS Suicide", size_max=40)